{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Working with Text Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hi Guys, Welcome to [Tirendaz Academy](https://youtube.com/c/tirendazacademy) 😀\n",
    "</br>\n",
    "In this notebook, I'm going to show how to work with text data in Pandas.\n",
    "</br>\n",
    "Happy learning 🐱‍🏍 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'HELLO'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"hello\".upper()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vectorized String Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[\"tim\",\"Kate\",\"SUSan\",np.nan,\"aLEX\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "name=pd.Series(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      Tim\n",
       "1     Kate\n",
       "2    Susan\n",
       "3      NaN\n",
       "4     Alex\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name.str.capitalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      tim\n",
       "1     kate\n",
       "2    susan\n",
       "3      NaN\n",
       "4     alex\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name.str.lower()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    3.0\n",
       "1    4.0\n",
       "2    5.0\n",
       "3    NaN\n",
       "4    4.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name.str.len()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    False\n",
       "1    False\n",
       "2    False\n",
       "3      NaN\n",
       "4     True\n",
       "dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "name.str.startswith(\"a\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Column A</th>\n",
       "      <th>Column B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.459978</td>\n",
       "      <td>0.200495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.739367</td>\n",
       "      <td>-2.557691</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.371356</td>\n",
       "      <td>0.086189</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Column A  Column B\n",
       "0 -0.459978  0.200495\n",
       "1  0.739367 -2.557691\n",
       "2  0.371356  0.086189"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.DataFrame(\n",
    "    np.random.randn(3,2),\n",
    "    columns=[\"Column A\",\"Column B\"],\n",
    "    index=range(3))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Column A', 'Column B'], dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['column_a', 'column_b'], dtype='object')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.str.lower().str.replace(\" \",\"_\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    a_b_c\n",
       "1    c_d_e\n",
       "2      NaN\n",
       "3    f_g_h\n",
       "dtype: object"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s=pd.Series([\"a_b_c\",\"c_d_e\",np.nan,\"f_g_h\"])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      b\n",
       "1      d\n",
       "2    NaN\n",
       "3      g\n",
       "dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.str.split(\"_\").str[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>b_c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>c</td>\n",
       "      <td>d_e</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>f</td>\n",
       "      <td>g_h</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1\n",
       "0    a  b_c\n",
       "1    c  d_e\n",
       "2  NaN  NaN\n",
       "3    f  g_h"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.str.split(\"_\",expand=True,n=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        15\n",
       "1      -$20\n",
       "2    $30000\n",
       "dtype: object"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "money=pd.Series([\"15\",\"-$20\",\"$30000\"])\n",
    "money"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        15\n",
       "1        20\n",
       "2    $30000\n",
       "dtype: object"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "money.str.replace(\"-\\$\",\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        15\n",
       "1       -20\n",
       "2    $30000\n",
       "dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "money.str.replace(\"-\\$\",\"-\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can use google or pandas.pydata.org to see the string methods of Pandas documentation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "film=pd.read_csv(\"http://bit.ly/imdbratings\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                     title content_rating   genre  duration  \\\n",
       "0          9.3  The Shawshank Redemption              R   Crime       142   \n",
       "1          9.2             The Godfather              R   Crime       175   \n",
       "2          9.1    The Godfather: Part II              R   Crime       200   \n",
       "3          9.0           The Dark Knight          PG-13  Action       152   \n",
       "4          8.9              Pulp Fiction              R   Crime       154   \n",
       "\n",
       "                                         actors_list  \n",
       "0  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "3  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "4  [u'John Travolta', u'Uma Thurman', u'Samuel L....  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "film.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0                             THE SHAWSHANK REDEMPTION\n",
       "1                                        THE GODFATHER\n",
       "2                               THE GODFATHER: PART II\n",
       "3                                      THE DARK KNIGHT\n",
       "4                                         PULP FICTION\n",
       "                            ...                       \n",
       "974                                            TOOTSIE\n",
       "975                        BACK TO THE FUTURE PART III\n",
       "976    MASTER AND COMMANDER: THE FAR SIDE OF THE WORLD\n",
       "977                                        POLTERGEIST\n",
       "978                                        WALL STREET\n",
       "Name: title, Length: 979, dtype: object"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "film.title.str.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "film.columns=film.columns.str.capitalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Star_rating</th>\n",
       "      <th>Title</th>\n",
       "      <th>Content_rating</th>\n",
       "      <th>Genre</th>\n",
       "      <th>Duration</th>\n",
       "      <th>Actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Star_rating                     Title Content_rating   Genre  Duration  \\\n",
       "0          9.3  The Shawshank Redemption              R   Crime       142   \n",
       "1          9.2             The Godfather              R   Crime       175   \n",
       "2          9.1    The Godfather: Part II              R   Crime       200   \n",
       "3          9.0           The Dark Knight          PG-13  Action       152   \n",
       "4          8.9              Pulp Fiction              R   Crime       154   \n",
       "\n",
       "                                         Actors_list  \n",
       "0  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "3  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "4  [u'John Travolta', u'Uma Thurman', u'Samuel L....  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "film.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Star_rating</th>\n",
       "      <th>Title</th>\n",
       "      <th>Content_rating</th>\n",
       "      <th>Genre</th>\n",
       "      <th>Duration</th>\n",
       "      <th>Actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Fight Club</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>139</td>\n",
       "      <td>[u'Brad Pitt', u'Edward Norton', u'Helena Bonh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>8.7</td>\n",
       "      <td>Se7en</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>127</td>\n",
       "      <td>[u'Morgan Freeman', u'Brad Pitt', u'Kevin Spac...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>8.3</td>\n",
       "      <td>Snatch.</td>\n",
       "      <td>R</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>102</td>\n",
       "      <td>[u'Jason Statham', u'Brad Pitt', u'Benicio Del...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td>8.3</td>\n",
       "      <td>Inglourious Basterds</td>\n",
       "      <td>R</td>\n",
       "      <td>Adventure</td>\n",
       "      <td>153</td>\n",
       "      <td>[u'Brad Pitt', u'Diane Kruger', u'Eli Roth']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>264</th>\n",
       "      <td>8.1</td>\n",
       "      <td>Twelve Monkeys</td>\n",
       "      <td>R</td>\n",
       "      <td>Mystery</td>\n",
       "      <td>129</td>\n",
       "      <td>[u'Bruce Willis', u'Madeleine Stowe', u'Brad P...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>508</th>\n",
       "      <td>7.8</td>\n",
       "      <td>The Curious Case of Benjamin Button</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Drama</td>\n",
       "      <td>166</td>\n",
       "      <td>[u'Brad Pitt', u'Cate Blanchett', u'Tilda Swin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>577</th>\n",
       "      <td>7.8</td>\n",
       "      <td>Ocean's Eleven</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Crime</td>\n",
       "      <td>116</td>\n",
       "      <td>[u'George Clooney', u'Brad Pitt', u'Julia Robe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>683</th>\n",
       "      <td>7.7</td>\n",
       "      <td>Fury</td>\n",
       "      <td>R</td>\n",
       "      <td>Action</td>\n",
       "      <td>134</td>\n",
       "      <td>[u'Brad Pitt', u'Shia LaBeouf', u'Logan Lerman']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>776</th>\n",
       "      <td>7.6</td>\n",
       "      <td>Moneyball</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Biography</td>\n",
       "      <td>133</td>\n",
       "      <td>[u'Brad Pitt', u'Robin Wright', u'Jonah Hill']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>779</th>\n",
       "      <td>7.6</td>\n",
       "      <td>Interview with the Vampire: The Vampire Chroni...</td>\n",
       "      <td>R</td>\n",
       "      <td>Horror</td>\n",
       "      <td>123</td>\n",
       "      <td>[u'Brad Pitt', u'Tom Cruise', u'Antonio Bander...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>807</th>\n",
       "      <td>7.6</td>\n",
       "      <td>The Assassination of Jesse James by the Coward...</td>\n",
       "      <td>R</td>\n",
       "      <td>Biography</td>\n",
       "      <td>160</td>\n",
       "      <td>[u'Brad Pitt', u'Casey Affleck', u'Sam Shepard']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>826</th>\n",
       "      <td>7.5</td>\n",
       "      <td>Sleepers</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>147</td>\n",
       "      <td>[u'Robert De Niro', u'Kevin Bacon', u'Brad Pitt']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>877</th>\n",
       "      <td>7.5</td>\n",
       "      <td>Legends of the Fall</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>133</td>\n",
       "      <td>[u'Brad Pitt', u'Anthony Hopkins', u'Aidan Qui...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>901</th>\n",
       "      <td>7.5</td>\n",
       "      <td>Babel</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>143</td>\n",
       "      <td>[u'Brad Pitt', u'Cate Blanchett', u'Gael Garc\\...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Star_rating                                              Title  \\\n",
       "9            8.9                                         Fight Club   \n",
       "24           8.7                                              Se7en   \n",
       "106          8.3                                            Snatch.   \n",
       "114          8.3                               Inglourious Basterds   \n",
       "264          8.1                                     Twelve Monkeys   \n",
       "508          7.8                The Curious Case of Benjamin Button   \n",
       "577          7.8                                     Ocean's Eleven   \n",
       "683          7.7                                               Fury   \n",
       "776          7.6                                          Moneyball   \n",
       "779          7.6  Interview with the Vampire: The Vampire Chroni...   \n",
       "807          7.6  The Assassination of Jesse James by the Coward...   \n",
       "826          7.5                                           Sleepers   \n",
       "877          7.5                                Legends of the Fall   \n",
       "901          7.5                                              Babel   \n",
       "\n",
       "    Content_rating      Genre  Duration  \\\n",
       "9                R      Drama       139   \n",
       "24               R      Drama       127   \n",
       "106              R     Comedy       102   \n",
       "114              R  Adventure       153   \n",
       "264              R    Mystery       129   \n",
       "508          PG-13      Drama       166   \n",
       "577          PG-13      Crime       116   \n",
       "683              R     Action       134   \n",
       "776          PG-13  Biography       133   \n",
       "779              R     Horror       123   \n",
       "807              R  Biography       160   \n",
       "826              R      Crime       147   \n",
       "877              R      Drama       133   \n",
       "901              R      Drama       143   \n",
       "\n",
       "                                           Actors_list  \n",
       "9    [u'Brad Pitt', u'Edward Norton', u'Helena Bonh...  \n",
       "24   [u'Morgan Freeman', u'Brad Pitt', u'Kevin Spac...  \n",
       "106  [u'Jason Statham', u'Brad Pitt', u'Benicio Del...  \n",
       "114       [u'Brad Pitt', u'Diane Kruger', u'Eli Roth']  \n",
       "264  [u'Bruce Willis', u'Madeleine Stowe', u'Brad P...  \n",
       "508  [u'Brad Pitt', u'Cate Blanchett', u'Tilda Swin...  \n",
       "577  [u'George Clooney', u'Brad Pitt', u'Julia Robe...  \n",
       "683   [u'Brad Pitt', u'Shia LaBeouf', u'Logan Lerman']  \n",
       "776     [u'Brad Pitt', u'Robin Wright', u'Jonah Hill']  \n",
       "779  [u'Brad Pitt', u'Tom Cruise', u'Antonio Bander...  \n",
       "807   [u'Brad Pitt', u'Casey Affleck', u'Sam Shepard']  \n",
       "826  [u'Robert De Niro', u'Kevin Bacon', u'Brad Pitt']  \n",
       "877  [u'Brad Pitt', u'Anthony Hopkins', u'Aidan Qui...  \n",
       "901  [u'Brad Pitt', u'Cate Blanchett', u'Gael Garc\\...  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "film[film.Actors_list.str.contains(\n",
    "    \"Brad Pitt\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      u'Tim Robbins', u'Morgan Freeman', u'Bob Gunton']\n",
       "1         u'Marlon Brando', u'Al Pacino', u'James Caan']\n",
       "2      u'Al Pacino', u'Robert De Niro', u'Robert Duva...\n",
       "3      u'Christian Bale', u'Heath Ledger', u'Aaron Ec...\n",
       "4      u'John Travolta', u'Uma Thurman', u'Samuel L. ...\n",
       "                             ...                        \n",
       "974    u'Dustin Hoffman', u'Jessica Lange', u'Teri Ga...\n",
       "975    u'Michael J. Fox', u'Christopher Lloyd', u'Mar...\n",
       "976    u'Russell Crowe', u'Paul Bettany', u'Billy Boyd']\n",
       "977    u'JoBeth Williams', u\"Heather O'Rourke\", u'Cra...\n",
       "978    u'Charlie Sheen', u'Michael Douglas', u'Tamara...\n",
       "Name: Actors_list, Length: 979, dtype: object"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "film.Actors_list.str.replace(\"[\",\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       u'Tim Robbins', u'Morgan Freeman', u'Bob Gunton'\n",
       "1          u'Marlon Brando', u'Al Pacino', u'James Caan'\n",
       "2      u'Al Pacino', u'Robert De Niro', u'Robert Duvall'\n",
       "3      u'Christian Bale', u'Heath Ledger', u'Aaron Ec...\n",
       "4      u'John Travolta', u'Uma Thurman', u'Samuel L. ...\n",
       "                             ...                        \n",
       "974    u'Dustin Hoffman', u'Jessica Lange', u'Teri Garr'\n",
       "975    u'Michael J. Fox', u'Christopher Lloyd', u'Mar...\n",
       "976     u'Russell Crowe', u'Paul Bettany', u'Billy Boyd'\n",
       "977    u'JoBeth Williams', u\"Heather O'Rourke\", u'Cra...\n",
       "978    u'Charlie Sheen', u'Michael Douglas', u'Tamara...\n",
       "Name: Actors_list, Length: 979, dtype: object"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "film.Actors_list.str.replace(\n",
    "    \"[\",\"\").str.replace(\"]\",\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Don't forget to follow us on [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
